In this script we assess whether perturbation indicators are associated with any of the technical covariates.
First we read the results generated by
perturbation_propensity.R:
results_file <- paste0(
.get_config_path("LOCAL_SCEPTRE2_DATA_DIR"),
"results/perturbation_propensity_analysis/results.rds"
)
results <- readRDS(results_file)
results
## # A tibble: 4,524 × 8
## paper dataset ntc covariate estimate std_error zvalue pvalue
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 frangieh co_culture ONE-NON-GENE… n_nonzero -1.60e-3 0.00116 -1.38 0.169
## 2 frangieh co_culture ONE-NON-GENE… n_umis 2.48e-5 0.0000950 0.260 0.795
## 3 frangieh co_culture ONE-NON-GENE… cluster_x 2.82e-1 0.239 1.18 0.238
## 4 frangieh co_culture ONE-NON-GENE… cluster_y 3.62e-1 0.205 1.76 0.0777
## 5 frangieh co_culture ONE-NON-GENE… s_score -1.02e-1 1.95 -0.0523 0.958
## 6 frangieh co_culture ONE-NON-GENE… g2m_score -2.56e+0 1.47 -1.74 0.0812
## 7 frangieh co_culture ONE-NON-GENE… phaseG2M 9.58e-1 0.781 1.23 0.220
## 8 frangieh co_culture ONE-NON-GENE… phaseS 7.95e-2 0.685 0.116 0.908
## 9 frangieh co_culture ONE-NON-GENE… batchB3 6.37e-1 0.631 1.01 0.312
## 10 frangieh co_culture ONE-NON-GENE… batchC3 -3.76e-1 0.768 -0.489 0.625
## # … with 4,514 more rows
Next, for each dataset, we plot the \(p\)-values for each covariate across NTCs:
datasets <- results |>
arrange(paper) |>
pull(dataset) |>
unique()
for (dataset in datasets) {
results_dataset <- results |> filter(dataset == !!dataset)
paper <- results_dataset |>
pull(paper) |>
unique()
p_hist <- results_dataset |>
ggplot(aes(x = pvalue)) +
geom_histogram(colour = "black", binwidth = 0.1, boundary = 0) +
facet_wrap(~covariate, scales = "free") +
scale_x_continuous(limits = c(0, 1)) +
labs(title = sprintf("%s %s", paper, dataset)) +
theme_bw()
plot(p_hist)
p_qq <- results_dataset |>
ggplot(aes(y = pvalue)) +
stat_qq_points() +
stat_qq_band() +
geom_abline() +
facet_wrap(~covariate, scales = "free") +
scale_x_continuous(trans = revlog_trans(base = 10)) +
scale_y_continuous(trans = revlog_trans(base = 10)) +
labs(title = sprintf("%s %s", paper, dataset)) +
theme_bw()
plot(p_qq)
}